library(tidyverse)
library(ggplot2)
knitr::opts_chunk$set(echo = TRUE, warning=FALSE, message=FALSE)Challenge 8 Solutions
Read in data:
I will be using the ‘birds’ data set and the ‘FAOstat Regions’ data set.
*Please note that I added ‘print(head())’ to each data frame to shorten the output, but the codes are not rendering.*
# read in data sets:
birds <- read_csv("_data/birds.csv")
  select(birds, -contains("Code"))# A tibble: 30,977 × 9
   Domain       Area        Element Item      Year Unit      Value Flag  Flag …¹
   <chr>        <chr>       <chr>   <chr>    <dbl> <chr>     <dbl> <chr> <chr>  
 1 Live Animals Afghanistan Stocks  Chickens  1961 1000 Head  4700 F     FAO es…
 2 Live Animals Afghanistan Stocks  Chickens  1962 1000 Head  4900 F     FAO es…
 3 Live Animals Afghanistan Stocks  Chickens  1963 1000 Head  5000 F     FAO es…
 4 Live Animals Afghanistan Stocks  Chickens  1964 1000 Head  5300 F     FAO es…
 5 Live Animals Afghanistan Stocks  Chickens  1965 1000 Head  5500 F     FAO es…
 6 Live Animals Afghanistan Stocks  Chickens  1966 1000 Head  5800 F     FAO es…
 7 Live Animals Afghanistan Stocks  Chickens  1967 1000 Head  6600 F     FAO es…
 8 Live Animals Afghanistan Stocks  Chickens  1968 1000 Head  6290 <NA>  Offici…
 9 Live Animals Afghanistan Stocks  Chickens  1969 1000 Head  6300 F     FAO es…
10 Live Animals Afghanistan Stocks  Chickens  1970 1000 Head  6000 F     FAO es…
# … with 30,967 more rows, and abbreviated variable name ¹`Flag Description`  unique(birds$Item)[1] "Chickens"               "Ducks"                  "Geese and guinea fowls"
[4] "Turkeys"                "Pigeons, other birds"    unique(birds$Flag)[1] "F"  NA   "Im" "M"  "*"  "A" birds_aggregated <- birds%>%
  filter(Flag=="A")
birds_split <- birds%>%
  filter(Flag!="A")
unique(birds_split$Flag)[1] "F"  "Im" "M"  "*" print(head(birds_split))# A tibble: 6 × 14
  Domai…¹ Domain Area …² Area  Eleme…³ Element Item …⁴ Item  Year …⁵  Year Unit 
  <chr>   <chr>    <dbl> <chr>   <dbl> <chr>     <dbl> <chr>   <dbl> <dbl> <chr>
1 QA      Live …       2 Afgh…    5112 Stocks     1057 Chic…    1961  1961 1000…
2 QA      Live …       2 Afgh…    5112 Stocks     1057 Chic…    1962  1962 1000…
3 QA      Live …       2 Afgh…    5112 Stocks     1057 Chic…    1963  1963 1000…
4 QA      Live …       2 Afgh…    5112 Stocks     1057 Chic…    1964  1964 1000…
5 QA      Live …       2 Afgh…    5112 Stocks     1057 Chic…    1965  1965 1000…
6 QA      Live …       2 Afgh…    5112 Stocks     1057 Chic…    1966  1966 1000…
# … with 3 more variables: Value <dbl>, Flag <chr>, `Flag Description` <chr>,
#   and abbreviated variable names ¹`Domain Code`, ²`Area Code`,
#   ³`Element Code`, ⁴`Item Code`, ⁵`Year Code`print(head(birds_aggregated))# A tibble: 6 × 14
  Domai…¹ Domain Area …² Area  Eleme…³ Element Item …⁴ Item  Year …⁵  Year Unit 
  <chr>   <chr>    <dbl> <chr>   <dbl> <chr>     <dbl> <chr>   <dbl> <dbl> <chr>
1 QA      Live …    5000 World    5112 Stocks     1057 Chic…    1961  1961 1000…
2 QA      Live …    5000 World    5112 Stocks     1057 Chic…    1962  1962 1000…
3 QA      Live …    5000 World    5112 Stocks     1057 Chic…    1963  1963 1000…
4 QA      Live …    5000 World    5112 Stocks     1057 Chic…    1964  1964 1000…
5 QA      Live …    5000 World    5112 Stocks     1057 Chic…    1965  1965 1000…
6 QA      Live …    5000 World    5112 Stocks     1057 Chic…    1966  1966 1000…
# … with 3 more variables: Value <dbl>, Flag <chr>, `Flag Description` <chr>,
#   and abbreviated variable names ¹`Domain Code`, ²`Area Code`,
#   ³`Element Code`, ⁴`Item Code`, ⁵`Year Code`unique(birds_aggregated$Area) [1] "World"                     "Africa"                   
 [3] "Eastern Africa"            "Middle Africa"            
 [5] "Northern Africa"           "Southern Africa"          
 [7] "Western Africa"            "Americas"                 
 [9] "Northern America"          "Central America"          
[11] "Caribbean"                 "South America"            
[13] "Asia"                      "Central Asia"             
[15] "Eastern Asia"              "Southern Asia"            
[17] "South-eastern Asia"        "Western Asia"             
[19] "Europe"                    "Eastern Europe"           
[21] "Northern Europe"           "Southern Europe"          
[23] "Western Europe"            "Oceania"                  
[25] "Australia and New Zealand" "Melanesia"                
[27] "Micronesia"                "Polynesia"                FAO_region <- read_csv("_data\\FAOSTAT_country_groups.csv")
FAO_region_country <- FAO_region%>%
  select(`Country Group`, `Country`)%>%
  rename(country_group = `Country Group`)%>%
  distinct()
head(FAO_region_country)# A tibble: 6 × 2
  country_group Country     
  <chr>         <chr>       
1 Africa        Algeria     
2 Africa        Angola      
3 Africa        Benin       
4 Africa        Botswana    
5 Africa        Burkina Faso
6 Africa        Burundi     unique(FAO_region_country$country_group) [1] "Africa"                                                             
 [2] "Americas"                                                           
 [3] "Annex I countries"                                                  
 [4] "Antarctic Region"                                                   
 [5] "Asia"                                                               
 [6] "Australia and New Zealand"                                          
 [7] "Caribbean"                                                          
 [8] "Caucasus and Central Asia"                                          
 [9] "Central America"                                                    
[10] "Central Asia"                                                       
[11] "Central Asia and Southern Asia"                                     
[12] "Eastern Africa"                                                     
[13] "Eastern Asia"                                                       
[14] "Eastern Asia (excluding China)"                                     
[15] "Eastern Asia (excluding Japan and China)"                           
[16] "Eastern Asia (excluding Japan)"                                     
[17] "Eastern Asia and South-eastern Asia"                                
[18] "Eastern Europe"                                                     
[19] "Europe"                                                             
[20] "European Union (27)"                                                
[21] "High-income economies"                                              
[22] "Land Locked Developing Countries"                                   
[23] "Latin America and the Caribbean"                                    
[24] "Least Developed Countries"                                          
[25] "Low income economies"                                               
[26] "Low Income Food Deficit Countries"                                  
[27] "Lower-middle-income economies"                                      
[28] "Melanesia"                                                          
[29] "Micronesia"                                                         
[30] "Middle Africa"                                                      
[31] "Net Food Importing Developing Countries"                            
[32] "Non-Annex I countries"                                              
[33] "North and Central America"                                          
[34] "Northern Africa"                                                    
[35] "Northern Africa (excluding Sudan)"                                  
[36] "Northern America"                                                   
[37] "Northern America and Europe"                                        
[38] "Northern Europe"                                                    
[39] "Oceania"                                                            
[40] "Oceania excluding Australia and New Zealand"                        
[41] "OECD"                                                               
[42] "Polynesia"                                                          
[43] "Small Island Developing States"                                     
[44] "South America"                                                      
[45] "South-eastern Asia"                                                 
[46] "Southern Africa"                                                    
[47] "Southern Asia"                                                      
[48] "Southern Asia (excluding India)"                                    
[49] "Southern Europe"                                                    
[50] "Sub-Saharan Africa"                                                 
[51] "Sub-Saharan Africa (including Sudan)"                               
[52] "Upper-middle-income economies"                                      
[53] "Western Africa"                                                     
[54] "Western Asia"                                                       
[55] "Western Asia (exc. Armenia, Azerbaijan, Cyprus, Israel and Georgia)"
[56] "Western Asia and Northern Africa"                                   
[57] "Western Europe"                                                     
[58] "World"                                                              # test:
FAO_region_country%>%
  filter(country_group == "Africa")# A tibble: 63 × 2
   country_group Country                 
   <chr>         <chr>                   
 1 Africa        Algeria                 
 2 Africa        Angola                  
 3 Africa        Benin                   
 4 Africa        Botswana                
 5 Africa        Burkina Faso            
 6 Africa        Burundi                 
 7 Africa        Cabo Verde              
 8 Africa        Cameroon                
 9 Africa        Central African Republic
10 Africa        Chad                    
# … with 53 more rowsBriefly describe the data
The ‘birds’ data set contains information related to stocks of different types of poultry: chickens, ducks, geese and guinea fowls, turkeys, and pigeons or other birds. After filtering out the aggregate data by area (my new data set now has 14 columns and 13716 rows), we can see the 28 regions included in the data set.
The ‘FAO_region_country’ data set contains information from the United Nation’s Food and Agriculture Association. This data set includes more detailed information related to the regions referenced in the ‘birds’ data set, specifically, the countries that are in each region. After creating a subset of the original data set that includes only “country” and “country_group”, I now have a data set with 2 columns and 1943 rows of data.
Tidy Data (as needed)
After reviewing the values within the ‘FAO_region_country’ data set, I can see that there is a value for “World” in the “country_group” column. So, I will remove this, as its data will overlap with that of other distinct countries.
install.packages("kableExtra")Error in contrib.url(repos, "source"): trying to use CRAN without setting a mirrorlibrary(kableExtra)
library(readxl)
# remove "World" group
FAO_region_cln <- FAO_region_country%>%
  filter(country_group!="World")
# create kable:
FAO_region_cln%>%
  group_by(country_group)%>%
  summarize(n=n())%>%
  arrange(desc(n))# A tibble: 57 × 2
   country_group                               n
   <chr>                                   <int>
 1 Non-Annex I countries                     161
 2 Net Food Importing Developing Countries    81
 3 Annex I countries                          78
 4 High-income economies                      64
 5 Africa                                     63
 6 Europe                                     63
 7 Americas                                   61
 8 Small Island Developing States             58
 9 Upper-middle-income economies              56
10 Low Income Food Deficit Countries          55
# … with 47 more rowshalf <- c(1:round(nrow(FAO_region_cln)/2))
knitr::kable(list(FAO_region_cln[half,],  
           matrix(numeric(), nrow=0, ncol=1),
           FAO_region_cln[-half,]), 
           caption = "Countries in Country Groups")%>%
  kableExtra::kable_styling(font_size=12)| 
 |  | 
 | 
There is still some overlap among country groups even after removing “World.” I need to extract country-level or regional groupings to see what information was aggregated in the original data set. Using Professor Rolfe’s example, I will identify four major grouping categories (annex, income, OECD, and major region) and the 277 countries nested within those categories.
FAO_region_cln%>%
  summarise(n=n())/277        n
1 6.01444FAO_region_cln%>%
  filter(str_detect(country_group, "[aA]nnex"))%>%
  group_by(country_group)%>%
  summarise(n=n())# A tibble: 2 × 2
  country_group             n
  <chr>                 <int>
1 Annex I countries        78
2 Non-Annex I countries   161FAO_region_cln%>%
  filter(str_detect(country_group, "[aA]nnex"))%>%
  summarise(n=n())# A tibble: 1 × 1
      n
  <int>
1   239FAO_region_cln%>%
  filter(str_detect(country_group, "[iI]ncome"))%>%
  group_by(country_group)%>%
  summarise(n=n())# A tibble: 5 × 2
  country_group                         n
  <chr>                             <int>
1 High-income economies                64
2 Low income economies                 34
3 Low Income Food Deficit Countries    55
4 Lower-middle-income economies        46
5 Upper-middle-income economies        56FAO_region_cln%>%
  filter(str_detect(country_group, "[iI]ncome"))%>%
  summarise(n=n())# A tibble: 1 × 1
      n
  <int>
1   255FAO_region_cln%>%
  filter(str_detect(country_group, "[Dd]evelop|OECD"))%>%
  group_by(country_group)%>%
  summarise(n=n())# A tibble: 5 × 2
  country_group                               n
  <chr>                                   <int>
1 Land Locked Developing Countries           32
2 Least Developed Countries                  51
3 Net Food Importing Developing Countries    81
4 OECD                                       36
5 Small Island Developing States             58FAO_region_cln%>%
  filter(str_detect(country_group, "[Dd]evelop|OECD"))%>%
  summarise(n=n())# A tibble: 1 × 1
      n
  <int>
1   258# group by major regions:
major_regions <- c("Africa", "Asia", "Europe", "Americas", 
                 "Oceania", "Antarctic Region")
FAO_region_cln%>%
  filter(country_group %in% major_regions)%>%
  summarise(n=n())# A tibble: 1 × 1
      n
  <int>
1   277I will now pivot the data and create four new categorical variables corresponding to the four groupings (annex, income, OECD, and major region) that contain most or all of the 277 countries included in the ’FAO_region_cln” data set.
# pivot and unite
FAO_regions_wide <- FAO_region_cln%>%
  pivot_wider(names_from=country_group, values_from = 1)%>%
  unite("gp_annex", contains("Annex"), 
        sep="", na.rm=TRUE, remove=TRUE)%>%
  unite("gp_major_region", any_of(major_regions), 
        sep="", na.rm=TRUE, remove=TRUE)%>%
  unite("gp_income", contains("Income")|contains("income"),
        sep="", na.rm=TRUE, remove=TRUE)%>%
  unite("gp_develop", contains("Develop")|contains("OECD"),
        sep="", na.rm=TRUE, remove=TRUE)%>%
  select(Country, starts_with("gp"))Join Data
I will now join the ‘birds_split’ data set (“Area”) with the pivoted ‘FAO_regions_wide’ data set (country and regional variables). I will do a left join, as each case includes a country, and each country should match the four regional indicators. I will also set the two key fields as the “Area” variable in the ‘birds_split’ data set and the “country” variable in the ‘FAO_regions_wide’ data set.
I can see based on the number of observations that the joined data set includes the correct number of observations (13716).
nrow(birds_split)[1] 13716birds_join <- left_join(birds_split, FAO_regions_wide,
                   by = c("Area" = "Country"))